Noorderpoort
np_domain <- query_db("SELECT * FROM domain WHERE name = 'noorderpoort.memorylab.app';", database = "slimstampen")
Users registered on this domain:
np_users <- query_db(paste0("SELECT id AS user_id, email FROM users WHERE domain_id = ", np_domain$id, ";"), database = "slimstampen")
Lessons on this domain:
np_lessons <- query_db(paste0("SELECT * FROM lesson WHERE domain_id = ", np_domain$id, ";"), database = "slimstampen")
Sessions from users on this domain during the pilot period:
np_sessions <- query_db(paste0("SELECT * FROM session WHERE token_id = 2 AND user_id IN (", paste(np_users$user_id, collapse = ", "), ");"), database = "ssaas")
np_sessions <- np_sessions[create_time > "2024-10-06"][create_time < "2024-11-05"]
When were these sessions?
np_sessions[, session_date := as.Date(create_time)]
np_test_dates <- data.table(test = c("Toets 1", "Toets 2"),
date = as.Date(c("2024-10-07", "2024-11-04")))
ggplot(np_sessions, aes(x = session_date)) +
geom_vline(data = np_test_dates, aes(xintercept = date), linetype = "dashed", colour = "grey50") +
geom_label(data = np_test_dates, aes(x = date, y = Inf, label = test), vjust = 1.05, hjust = .5, colour = "grey50") +
geom_histogram(binwidth = 1, fill = colours_memorylab[1]) +
labs(x = "Datum", y = "MemoryLab oefensessies per dag", title = "MemoryLab oefenactiviteit over de tijd") +
scale_x_date(date_labels = "%d/%m", date_breaks = "1 week", limits = c(as.Date("2024-10-06"), as.Date("2024-11-05"))) +
scale_y_continuous(expand = c(0, 0)) +
theme_ml() +
theme(panel.grid.major.y = element_line(colour = "grey90"))
ggsave(here("output", "memorylab_oefensessies_over_tijd.png"), width = 10, height = 5)

Most popular days:
np_sessions[, .N, by = .(session_date)][order(-N)]
Total sessions:
nrow(np_sessions)
[1] 696
Sessions per user:
np_sessions[, .N, by = user_id][order(-N)]
Total users with at least one session:
length(unique(np_sessions$user_id))
[1] 108
Which lessons did users do? Parse the session context:
np_sessions[, context_parsed := map(context, function (x) {
x <- fromJSON(x)
data.table(lesson_id = x$lessonId,
lesson_group_id = x$lessonGroupId,
title = x$title)
})]
np_sessions_parsed <- np_sessions[, rbindlist(context_parsed), by = .(session_id = id, user_id, create_time, session_date)]
Practice by lesson:
np_sessions_parsed[, .(`Keer geoefend` = .N), by = .(Les = title)][order(-`Keer geoefend`)] |> knitr::kable()
| Tafels_les 1 |
104 |
| Afronden op decimalen |
75 |
| Procenten |
69 |
| Optellen |
51 |
| Breuken verkennen |
50 |
| Afronden op hele getallen |
49 |
| Aftrekken |
45 |
| Delen |
38 |
| Getallen en cijfers |
36 |
| Tafels_les 2 |
26 |
| Vermenigvuldigen met grote getallen |
25 |
| Breuken vermenigvuldigen |
23 |
| Breuken versimpelen |
19 |
| Rekentaal - Belangrijke woorden & Afkortingen
(1) |
16 |
| Decimale getallen |
12 |
| Tafels_les 5 |
11 |
| Grootheden |
10 |
| Rekentaal - Meetkunde |
7 |
| Metriek stelsel |
7 |
| Tafels_les 3 |
6 |
| Vermenigvuldigen met kommagetallen |
5 |
| Decimale getallen + en - |
3 |
| Rekentaal - Getalbegrippen (1) |
2 |
| Tafels_les 4 |
2 |
| Rekentaal - Belangrijke woorden & Afkortingen
(3) |
2 |
| Rekentaal - Symbolen en tekens |
1 |
| Rekentaal - Belangrijke woorden & Afkortingen
(2) |
1 |
| Rekentaal - Ruimtebegrippen (1) |
1 |
We can link each practice session to one of the test topics:
np_lesson_groups <- data.table(
lesson_group_id = c(
29092,
29087,
29096,
29089,
29098,
29086,
29097,
29095,
29090,
29088,
29094,
29091),
topic = c(
"Rekentaal",
"Breuken",
"Percentage",
"Eenheden",
"Delen",
"Afronden",
"Cijfers",
"Vermenigvuldigen",
"Aftrekken & Optellen",
"Decimalen",
"Tafels",
"Rekentaal"
)
)
# Set lesson topics to the right order
np_lesson_groups[, topic := factor(topic, levels = c("Delen",
"Percentage",
"Cijfers",
"Breuken",
"Tafels",
"Decimalen",
"Aftrekken & Optellen",
"Vermenigvuldigen",
"Afronden",
"Eenheden",
"Rekentaal"))]
np_lessons <- merge(np_lessons, np_lesson_groups, by = "lesson_group_id")
np_sessions_parsed <- merge(np_sessions_parsed, np_lesson_groups, by = "lesson_group_id")
Practice by lesson topic:
np_sessions_parsed[, .(`Keer geoefend` = .N), by = .(Onderwerp = topic)][order(-`Keer geoefend`)] |> knitr::kable()
| Tafels |
149 |
| Afronden |
124 |
| Aftrekken & Optellen |
96 |
| Breuken |
92 |
| Percentage |
69 |
| Delen |
38 |
| Cijfers |
36 |
| Rekentaal |
30 |
| Vermenigvuldigen |
30 |
| Eenheden |
17 |
| Decimalen |
15 |
Bar plot of sessions per topic:
np_sessions_parsed[, .(`Keer geoefend` = .N), by = .(Onderwerp = topic)] |>
ggplot(aes(x = Onderwerp, y = `Keer geoefend`)) +
geom_col(fill = colours_memorylab[1]) +
labs(x = "Onderwerp", y = "Keer geoefend", caption = "Data from noorderpoort.memorylab.app") +
theme_ml()

Mastery credits:
np_credits <- query_db(paste0("SELECT * FROM lesson_mastered WHERE user_id IN (", paste(np_users$user_id, collapse = ", "), ");"), database = "slimstampen")
# Add lesson titles and topics
np_credits <- merge(np_credits, np_lessons[, .(lesson_id = id, title, topic)])
Credits by topic:
np_credits[, .N, by = .(topic)][order(-N)]
Responses from these user_ids:
np_responses <- query_db(paste0("SELECT * FROM response WHERE token_id = 2 AND user_id IN (", paste(np_users$user_id, collapse = ", "), ");"), database = "ssaas")
np_responses <- np_responses[create_time > "2024-10-06"][create_time < "2024-11-05"]
# Add lesson titles and topics
np_responses <- merge(np_responses, np_sessions_parsed[, .(session_id, title, topic)], by = "session_id")
Responses by user:
np_responses[, .N, by = .(user_id)][order(-N)]
Responses by topic:
np_responses[, .N, by = .(topic)][order(-N)]
ggplot(np_responses, aes(x = as.Date(create_time))) +
geom_histogram(binwidth = 1, fill = colours_memorylab[1]) +
labs(x = "Date", y = "Responses per day", caption = "Data from noorderpoort.memorylab.app") +
scale_y_continuous(expand = c(0, 0), labels = scales::number_format(big.mark = ",")) +
theme_ml() +
theme(panel.grid.major.y = element_line(colour = "grey90"))

Split by lesson topic:
ggplot(np_responses, aes(x = as.Date(create_time))) +
geom_histogram(aes(fill = topic), binwidth = 1) +
labs(x = "Date", y = "Responses per day", fill = "Onderwerp", caption = "Data from noorderpoort.memorylab.app") +
scale_y_continuous(expand = c(0, 0), labels = scales::number_format(big.mark = ",")) +
scale_fill_viridis_d() +
theme_ml() +
theme(panel.grid.major.y = element_line(colour = "grey90"))

How does study behaviour on specific topics relate to test
performance?
We want to see whether studying a specific topic is related to an
increase in test performance. Studying behaviour can be summarised in
several ways: time spent, number of sessions, number of questions
answered, number of credits achieved.
np_session_stats <- np_responses[, .(
n_responses = .N,
duration = max(presentation_start_time) + presentation_duration[which.max(presentation_start_time)] - min(presentation_start_time),
accuracy = mean(correct)
), by = .(user_id, topic, session_id)]
np_practice_stats <- np_session_stats[, .(
n_sessions = .N,
n_responses = sum(n_responses),
duration = sum(duration),
accuracy = mean(accuracy)
), by = .(user_id, topic)]
Load test scores per topic:
np_test_scores <- fread(here("data", "test", "noorderpoort_scores_by_topic.csv"))
np_test_scores[, Email := tolower(trimws(Email))]
# Link to MemoryLab user IDs
np_test_scores <- merge(np_test_scores, np_users, by.x = "Email", by.y = "email", all = TRUE)
There are some test scores for which we don’t have any associated
MemoryLab data:
np_test_scores[is.na(user_id), .(unique(Email))]
There are also some MemoryLab users for which we don’t have any
associated test scores:
np_test_scores[is.na(component), .(unique(Email))]
For this analysis we’ll only include users of whom we have two test
scores as well as some MemoryLab practice data.
np_test_scores[, did_ml := !is.na(user_id)]
np_test_scores[, two_tests := uniqueN(test) == 2, by = .(user_id)]
np_test_scores[, include_user := did_ml & two_tests]
Mean test scores from included students:
Distribution of test scores:

Do a paired t-test to show that the difference is significant:
t.test(test_score_dat$Posttest, test_score_dat$Pretest, paired = TRUE)
Paired t-test
data: test_score_dat$Posttest and test_score_dat$Pretest
t = -5.2446, df = 73, p-value = 1.47e-06
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
-2.741364 -1.231609
sample estimates:
mean difference
-1.986486
Combine data:
np_scores <- np_test_scores[include_user == TRUE & !component %in% c("Totaal punten", "Cijfer"), .(
user_id,
topic = component,
score,
test
)]
np_scores <- dcast(np_scores, user_id + topic ~ test, value.var = "score")
setnames(np_scores, c("Posttest", "Pretest"), c("score_test_2", "score_test_1"))
np_scores[, score_test_change := score_test_2 - score_test_1]
# np_scores[, topic := factor(topic, levels = c("Delen",
# "Percentage",
# "Cijfers",
# "Breuken",
# "Tafels",
# "Decimalen",
# "Aftrekken & Optellen",
# "Vermenigvuldigen",
# "Afronden",
# "Eenheden",
# "Rekentaal"))]
np_scores_and_practice <- merge(np_scores, np_practice_stats, by = c("user_id", "topic"), all.x = TRUE)
# If a user has no practice data, we'll fill in zeros
np_scores_and_practice[is.na(n_sessions), n_sessions := 0]
np_scores_and_practice[is.na(n_responses), n_responses := 0]
np_scores_and_practice[is.na(duration), duration := 0]
Plot of scores:
mean_scores <- np_scores_and_practice[, .(score_test_1 = mean(score_test_1), score_test_2 = mean(score_test_2)), by = .(topic)] |>
melt(id.vars = "topic", variable.name = "test", value.name = "score")
p_scores <- melt(np_scores_and_practice, measure.vars = c("score_test_1", "score_test_2"), variable.name = "test", value.name = "score") |>
ggplot(aes(x = test, y = score)) +
facet_wrap(~ topic, ncol = 5) +
geom_point(alpha = .4, size = .5) +
geom_line(aes(group = user_id), alpha = .4, lty = 3) +
geom_point(data = mean_scores, colour = colours_memorylab[1], size = 2.5) +
geom_line(data = mean_scores, aes(group = topic), colour = colours_memorylab[1], lwd = 1) +
scale_x_discrete(labels = c("1", "2")) +
labs(x = "Toetsmoment", y = "Score", title = "Toetsscores") +
theme_ml() +
theme(panel.grid.major.y = element_line(colour = "grey90"),
strip.text = element_text(face = "bold")
)
p_scores
ggsave(here("output", "testscores_noorderpoort.png"), width = 8, height = 5)

How much was each topic practiced?
ggplot(np_scores_and_practice, aes(x = n_responses)) +
facet_wrap(~ topic, ncol = 5) +
geom_histogram(binwidth = 20, fill = colours_memorylab[1]) +
labs(x = "Number of practice responses per student", y = "Frequency", colour = "Topic", caption = "Data from noorderpoort.memorylab.app") +
theme_ml()

Did students choose to practice topics on which their pretest score
was low?
# Add mean pretest scores per topic
mean_pretest_scores <- mean_scores[test == "score_test_1", .(topic, mean_score = round(score, 2))]
np_scores_and_practice <- merge(np_scores_and_practice, mean_pretest_scores, by = "topic")
np_scores_and_practice[, topic_label := paste0(topic, "\n(Gemiddelde score: ", mean_score, ")")]
ggplot(np_scores_and_practice, aes(x = score_test_1, y = n_responses)) +
facet_wrap(~ topic_label, ncol = 5) +
geom_point(aes(fill = as.factor(score_test_1)), colour = "black", alpha = .8, position = position_jitter(height = 0, width = .1, seed = 0), pch = 21) +
scale_fill_brewer(palette = "RdYlGn") +
guides(fill = "none") +
labs(x = "Score op Toets 1", y = "Aantal gemaakte MemoryLab oefeningen", colour = "Onderwerp", caption = "noorderpoort.memorylab.app") +
theme_ml() +
theme(panel.grid.major.x = element_line(colour = "grey90"),
panel.grid.major.y = element_line(colour = "grey90"),
strip.text = element_text(face = "bold"))

Interpretation: not really.
Same plot but with totals instead of individual values:
p_practice <- np_scores_and_practice[, .(n_sessions_total = sum(n_sessions)), by = .(topic_label, score_test_1)] |>
ggplot(aes(x = score_test_1, y = n_sessions_total)) +
facet_wrap(~ topic_label, ncol = 5) +
geom_col(aes(fill = as.factor(score_test_1)), colour = "black", alpha = .8) +
scale_fill_brewer(palette = "RdYlGn") +
guides(fill = "none") +
labs(x = "Score op Toets 1", y = "Aantal MemoryLab oefensessies", colour = "Onderwerp", title = "Oefenactiviteit") +
theme_ml() +
theme(panel.grid.major.x = element_line(colour = "grey90"),
panel.grid.major.y = element_line(colour = "grey90"),
strip.text = element_text(face = "bold"))
p_practice
ggsave(here("output", "memorylab_oefensessies_noorderpoort.png"), width = 9, height = 5)

Combined plot:
library(patchwork)
p_scores + p_practice + plot_layout(ncol = 1)
ggsave(here("output", "memorylab_oefening_en_scores_noorderpoort.png"), width = 10, height = 10)

Is there a relation between score change and the number of practice
sessions?
np_scores_and_practice[, .(n_sessions = sum(n_sessions), score_test_change = mean(score_test_change)), by = .(topic_label)]
ggplot(np_scores_and_practice, aes(x = n_sessions, y = score_test_change)) +
facet_wrap(~ topic) +
geom_smooth(method = "lm", colour = colours_memorylab[1]) +
geom_point(alpha = .25) +
labs(x = "Number of practice sessions", y = "Change in test score", colour = "Topic", caption = "Data from noorderpoort.memorylab.app") +
scale_colour_viridis_d() +
theme_ml()

It looks like there might be a positive effect of practice. Let’s
look at it more simply: Is there a relation between score change and
whether or not the student has practiced the topic at all?

Average change:
np_scores_and_practice[, .(N = .N, mean_score_test_change = mean(score_test_change), sd_score_test_change = sd(score_test_change)), by = .(topic, topic_label, did_practice)]
Is there a significant difference in score change between students
who practiced and those who didn’t, taking into account differences in
score on the first test?
library(lmerTest)
lmer(score_test_change ~ did_practice*score_test_1 + (1 | user_id), data = np_scores_and_practice) |>
summary()
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: score_test_change ~ did_practice * score_test_1 + (1 | user_id)
Data: np_scores_and_practice
REML criterion at convergence: 1900.3
Scaled residuals:
Min 1Q Median 3Q Max
-3.9140 -0.5643 0.2549 0.5545 3.0879
Random effects:
Groups Name Variance Std.Dev.
user_id (Intercept) 0.03115 0.1765
Residual 0.72498 0.8515
Number of obs: 740, groups: user_id, 74
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.73169 0.17700 728.08196 4.134 3.98e-05 ***
did_practiceTRUE 0.95870 0.32490 732.05595 2.951 0.00327 **
score_test_1 -0.30359 0.05005 735.64156 -6.066 2.09e-09 ***
did_practiceTRUE:score_test_1 -0.17258 0.09070 733.75382 -1.903 0.05746 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) dd_TRUE scr__1
dd_prctTRUE -0.537
score_tst_1 -0.968 0.526
dd_TRUE:__1 0.533 -0.978 -0.551
Yes: practicing is associated with an increase in score change of
.96; scoring a point higher on test 1 is associated with a lower score
change (-.30).
lmer(score_test_change ~ did_practice*topic + (1 | user_id), data = np_scores_and_practice) |>
summary()
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: score_test_change ~ did_practice * topic + (1 | user_id)
Data: np_scores_and_practice
REML criterion at convergence: 1879.5
Scaled residuals:
Min 1Q Median 3Q Max
-4.4845 -0.4243 0.0835 0.3943 3.7491
Random effects:
Groups Name Variance Std.Dev.
user_id (Intercept) 0.03031 0.1741
Residual 0.69869 0.8359
Number of obs: 740, groups: user_id, 74
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.35524 0.15045 719.99346 -2.361 0.01848 *
did_practiceTRUE 0.64970 0.19920 714.24055 3.261 0.00116 **
topicAftrekken & Optellen 0.23774 0.20291 678.50225 1.172 0.24177
topicBreuken 0.28800 0.19954 683.33089 1.443 0.14939
topicCijfers -0.05404 0.18621 677.28939 -0.290 0.77175
topicDecimalen 0.29335 0.18096 687.94394 1.621 0.10546
topicDelen 0.26533 0.18722 687.45604 1.417 0.15686
topicEenheden -0.81664 0.17952 683.22964 -4.549 6.38e-06 ***
topicPercentage 0.04765 0.19666 689.08359 0.242 0.80861
topicTafels 0.42404 0.23702 696.66575 1.789 0.07404 .
topicVermenigvuldigen 0.31759 0.18590 684.84812 1.708 0.08802 .
did_practiceTRUE:topicAftrekken & Optellen -0.60389 0.27897 698.77735 -2.165 0.03075 *
did_practiceTRUE:topicBreuken -0.23866 0.28000 706.18970 -0.852 0.39432
did_practiceTRUE:topicCijfers -0.52268 0.30284 700.47656 -1.726 0.08480 .
did_practiceTRUE:topicDecimalen -0.85266 0.39312 719.27378 -2.169 0.03041 *
did_practiceTRUE:topicDelen -0.51008 0.30142 715.64178 -1.692 0.09103 .
did_practiceTRUE:topicEenheden -0.46998 0.47989 714.68596 -0.979 0.32774
did_practiceTRUE:topicPercentage -0.52837 0.28220 713.57571 -1.872 0.06157 .
did_practiceTRUE:topicTafels -0.76463 0.29569 711.92758 -2.586 0.00991 **
did_practiceTRUE:topicVermenigvuldigen -0.83879 0.30763 713.07137 -2.727 0.00656 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation matrix not shown by default, as p = 20 > 12.
Use print(x, correlation=TRUE) or
vcov(x) if you need it
On some topics, performance on the pretest was already really high,
in which case we would not expect much improvement from practice. Let’s
look at the relation between pretest score and score change, taking into
account whether the student practiced or not:
ggplot(np_scores_and_practice, aes(x = score_test_1, y = score_test_2, colour = did_practice)) +
facet_wrap(~ topic, ncol = 5) +
geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_smooth(method = "lm") +
geom_point(alpha = .25) +
labs(x = "Pretest score", y = "Posttest score", colour = "Did the student\npractice the topic?", caption = "Data from noorderpoort.memorylab.app") +
theme_ml() +
coord_fixed(xlim = c(0, 4), ylim = c(0, 4))

Let’s look at performance during practice. Accuracy by topic:
ggplot(np_practice_stats, aes(x = as.character(topic), y = accuracy, fill = topic)) +
geom_boxplot() +
geom_jitter(width = .1, height = 0, alpha = .5) +
labs(x = "Topic", y = "Accuracy", fill = "Topic") +
scale_y_continuous(limits = c(.4, 1), labels = scales::percent) +
scale_fill_viridis_d() +
guides(fill = "none") +
theme_ml() +
theme(panel.grid.major.y = element_line(colour = "grey"))

Accuracy by user:
np_practice_stats[n_responses > 10, .(mean_accuracy = mean(accuracy), sd_accuracy = sd(accuracy)), by = .(user_id)]
ggplot(np_practice_stats[n_responses > 10], aes(x = reorder(as.character(user_id), accuracy), y = accuracy)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(aes(colour = as.character(topic)), width = .1, height = 0, alpha = .25) +
labs(x = "Student", y = "Accuratesse", colour = "Onderwerp") +
scale_y_continuous(limits = c(.4, 1), labels = scales::percent) +
scale_colour_viridis_d() +
guides(fill = "none") +
theme_ml() +
theme(panel.grid.major.y = element_line(colour = "grey"),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())

Speed of forgetting by fact and topic:
np_sof <- np_responses[, .(final_alpha = alpha[which.max(presentation_start_time)]), by = .(text, topic, user_id)]
Error in `[.data.table`(np_responses, , .(final_alpha = alpha[which.max(presentation_start_time)]), :
column or expression 1 of 'by' or 'keyby' is type closure. Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]
ggplot(np_sof_avg[N > 10], aes(x = sof_mean, y = tidytext::reorder_within(text, sof_mean, as.character(topic)), alpha = N)) +
facet_grid(as.character(topic) ~ ., scales = "free_y") +
geom_errorbarh(aes(xmin = sof_mean - sof_se, xmax = sof_mean + sof_se), height = 0, colour = colours_memorylab[5]) +
geom_point(colour = colours_memorylab[5]) +
labs(y = "Feit", x = "Vergeetsnelheid (hoger = moeilijker)", alpha = "Geoefend door\naantal studenten") +
theme_ml() +
scale_x_continuous(limits = c(.1, .5)) +
tidytext::scale_y_reordered() +
theme(axis.text.y = element_text(size = 4),
panel.grid.major.x = element_line(colour = "grey90"))

ggsave(here("output", "sof_by_fact_and_topic.png"), height = 15, width = 8)
Alfa college
alfa_domain <- query_db("SELECT * FROM domain WHERE name = 'alfa.memorylab.app';", database = "slimstampen")
Users registered on this domain:
alfa_users <- query_db(paste0("SELECT id AS user_id FROM users WHERE domain_id = ", alfa_domain$id, ";"), database = "slimstampen")
Lessons on this domain:
alfa_lessons <- query_db(paste0("SELECT * FROM lesson WHERE domain_id = ", alfa_domain$id, ";"), database = "slimstampen")
Sessions from users on this domain during the pilot period:
alfa_sessions <- query_db(paste0("SELECT * FROM session WHERE token_id = 2 AND user_id IN (", paste(alfa_users$user_id, collapse = ", "), ");"), database = "ssaas")
alfa_sessions <- alfa_sessions[create_time > "2024-11-01"]
When were these sessions?
alfa_sessions[, session_date := as.Date(create_time)]
ggplot(alfa_sessions, aes(x = session_date)) +
geom_histogram(binwidth = 1, fill = colours_memorylab[1]) +
labs(x = "Date", y = "Sessions per day", caption = "Data from alfa.memorylab.app") +
scale_y_continuous(expand = c(0, 0)) +
theme_ml() +
theme(panel.grid.major.y = element_line(colour = "grey90"))
Most popular days:
alfa_sessions[, .N, by = .(session_date)][order(-N)]
Total sessions:
nrow(alfa_sessions)
Sessions per user:
alfa_sessions[, .N, by = user_id][order(-N)]
Total users with at least one session:
length(unique(alfa_sessions$user_id))
Which lessons did users do? Parse the session context:
alfa_sessions_parsed <- alfa_sessions[, map_dfr(context, fromJSON)] |> setDT()
alfa_sessions_parsed[, .(`Keer geoefend` = .N), by = .(Les = title)][order(-`Keer geoefend`)] |> knitr::kable()
Mastery credits:
alfa_credits <- query_db(paste0("SELECT * FROM lesson_mastered WHERE user_id IN (", paste(alfa_users$user_id, collapse = ", "), ");"), database = "slimstampen")
Add lesson titles:
alfa_credits <- merge(alfa_credits, alfa_lessons[, .(lesson_id = id, title)])
alfa_credits[, .N, by = .(title)][order(-N)]
query_db(query = "SELECT *
FROM pg_catalog.pg_tables
WHERE schemaname != 'pg_catalog' AND
schemaname != 'information_schema';",
database = "slimstampen")
---
title: "Usage statistics"
subtitle: "MBO rekenen pilot 2024"
author: "Maarten van der Velde"
date: "Last updated: `r Sys.Date()`"
output:
  html_notebook:
    smart: no
    toc: yes
    toc_float: yes
  github_document:
    toc: yes
editor_options: 
  chunk_output_type: inline
---

```{r}
library(here)
library(data.table)
library(ggplot2)
library(jsonlite)
library(purrr)

theme_memorylab_url <- "https://raw.githubusercontent.com/SlimStampen/theme_memorylab/master/theme_memorylab.R"
source(theme_memorylab_url)

source(here("..", "databases", "database_functions.R"))
```

# Noorderpoort

```{r}
np_domain <- query_db("SELECT * FROM domain WHERE name = 'noorderpoort.memorylab.app';", database = "slimstampen")
```

Users registered on this domain:
```{r}
np_users <- query_db(paste0("SELECT id AS user_id, email FROM users WHERE domain_id = ", np_domain$id, ";"), database = "slimstampen")
```

Lessons on this domain:
```{r}
np_lessons <- query_db(paste0("SELECT * FROM lesson WHERE domain_id = ", np_domain$id, ";"), database = "slimstampen")
```

Sessions from users on this domain during the pilot period:
```{r}
np_sessions <- query_db(paste0("SELECT * FROM session WHERE token_id = 2 AND user_id IN (", paste(np_users$user_id, collapse = ", "), ");"), database = "ssaas")
np_sessions <- np_sessions[create_time > "2024-10-06"][create_time < "2024-11-05"]
```

When were these sessions?
```{r}
np_sessions[, session_date := as.Date(create_time)]

np_test_dates <- data.table(test = c("Toets 1", "Toets 2"),
                            date = as.Date(c("2024-10-07", "2024-11-04")))

ggplot(np_sessions, aes(x = session_date)) +
  geom_vline(data = np_test_dates, aes(xintercept = date), linetype = "dashed", colour = "grey50") +
  geom_label(data = np_test_dates, aes(x = date, y = Inf, label = test), vjust = 1.05, hjust = .5, colour = "grey50") +
  geom_histogram(binwidth = 1, fill = colours_memorylab[1]) +
  labs(x = "Datum", y = "MemoryLab oefensessies per dag", title = "MemoryLab oefenactiviteit over de tijd") +
  scale_x_date(date_labels = "%d/%m", date_breaks = "1 week", limits = c(as.Date("2024-10-06"), as.Date("2024-11-05"))) +
  scale_y_continuous(expand = c(0, 0)) +
  theme_ml() +
  theme(panel.grid.major.y = element_line(colour = "grey90"))

ggsave(here("output", "memorylab_oefensessies_over_tijd.png"), width = 10, height = 5)
```

```{r}
```

Most popular days:
```{r}
np_sessions[, .N, by = .(session_date)][order(-N)]
```
Total sessions:
```{r}
nrow(np_sessions)
```

Sessions per user:
```{r}
np_sessions[, .N, by = user_id][order(-N)]
```

Total users with at least one session:
```{r}
length(unique(np_sessions$user_id))
```


Which lessons did users do? Parse the session context:
```{r}
np_sessions[, context_parsed := map(context, function (x) {
  x <- fromJSON(x)
  data.table(lesson_id = x$lessonId,
             lesson_group_id = x$lessonGroupId,
             title = x$title)
})]

np_sessions_parsed <- np_sessions[, rbindlist(context_parsed), by = .(session_id = id, user_id, create_time, session_date)]
```

Practice by lesson:
```{r}
np_sessions_parsed[, .(`Keer geoefend` = .N), by = .(Les = title)][order(-`Keer geoefend`)] |> knitr::kable()
```

We can link each practice session to one of the test topics:
```{r}
np_lesson_groups <- data.table(
  lesson_group_id = c(
    29092,
    29087,
    29096,
    29089,
    29098,
    29086,
    29097,
    29095,
    29090,
    29088,
    29094,
    29091),
  topic = c(
    "Rekentaal",
    "Breuken",
    "Percentage",
    "Eenheden",
    "Delen",
    "Afronden",
    "Cijfers",
    "Vermenigvuldigen",
    "Aftrekken & Optellen",
    "Decimalen",
    "Tafels",
    "Rekentaal"
  )
)

# Set lesson topics to the right order
np_lesson_groups[, topic := factor(topic, levels = c("Delen",
                                                     "Percentage",
                                                     "Cijfers",
                                                     "Breuken",
                                                     "Tafels",
                                                     "Decimalen",
                                                     "Aftrekken & Optellen",
                                                     "Vermenigvuldigen",
                                                     "Afronden",
                                                     "Eenheden",
                                                     "Rekentaal"))]


np_lessons <- merge(np_lessons, np_lesson_groups, by = "lesson_group_id")
np_sessions_parsed <- merge(np_sessions_parsed, np_lesson_groups, by = "lesson_group_id")
```

Practice by lesson topic:
```{r}
np_sessions_parsed[, .(`Keer geoefend` = .N), by = .(Onderwerp = topic)][order(-`Keer geoefend`)] |> knitr::kable()
```

Bar plot of sessions per topic:
```{r}
np_sessions_parsed[, .(`Keer geoefend` = .N), by = .(Onderwerp = topic)] |>
  ggplot(aes(x = Onderwerp, y = `Keer geoefend`)) +
  geom_col(fill = colours_memorylab[1]) +
  labs(x = "Onderwerp", y = "Keer geoefend", caption = "Data from noorderpoort.memorylab.app") +
  theme_ml()
```


Mastery credits:
```{r}
np_credits <- query_db(paste0("SELECT * FROM lesson_mastered WHERE user_id IN (", paste(np_users$user_id, collapse = ", "), ");"), database = "slimstampen")

# Add lesson titles and topics
np_credits <- merge(np_credits, np_lessons[, .(lesson_id = id, title, topic)])
```

Credits by topic:
```{r}
np_credits[, .N, by = .(topic)][order(-N)]
```

Responses from these user_ids:
```{r}
np_responses <- query_db(paste0("SELECT * FROM response WHERE token_id = 2 AND user_id IN (", paste(np_users$user_id, collapse = ", "), ");"), database = "ssaas")
np_responses <- np_responses[create_time > "2024-10-06"][create_time < "2024-11-05"]

# Add lesson titles and topics
np_responses <- merge(np_responses, np_sessions_parsed[, .(session_id, title, topic)], by = "session_id")
```

Responses by user:
```{r}
np_responses[, .N, by = .(user_id)][order(-N)]
```

Responses by topic:
```{r}
np_responses[, .N, by = .(topic)][order(-N)]
```





```{r}
ggplot(np_responses, aes(x = as.Date(create_time))) +
  geom_histogram(binwidth = 1, fill = colours_memorylab[1]) +
  labs(x = "Date", y = "Responses per day", caption = "Data from noorderpoort.memorylab.app") +
  scale_y_continuous(expand = c(0, 0), labels = scales::number_format(big.mark = ",")) +
  theme_ml() +
  theme(panel.grid.major.y = element_line(colour = "grey90"))
```
 
Split by lesson topic:
```{r}
ggplot(np_responses, aes(x = as.Date(create_time))) +
  geom_histogram(aes(fill = topic), binwidth = 1) +
  labs(x = "Date", y = "Responses per day", fill = "Onderwerp", caption = "Data from noorderpoort.memorylab.app") +
  scale_y_continuous(expand = c(0, 0), labels = scales::number_format(big.mark = ",")) +
  scale_fill_viridis_d() +
  theme_ml() +
  theme(panel.grid.major.y = element_line(colour = "grey90"))

```
 

## How does study behaviour on specific topics relate to test performance?

We want to see whether studying a specific topic is related to an increase in test performance.
Studying behaviour can be summarised in several ways: time spent, number of sessions, number of questions answered, number of credits achieved.

```{r}
np_session_stats <- np_responses[, .(
  n_responses = .N,
  duration = max(presentation_start_time) + presentation_duration[which.max(presentation_start_time)] - min(presentation_start_time),
  accuracy = mean(correct)
), by = .(user_id, topic, session_id)]

np_practice_stats <- np_session_stats[, .(
  n_sessions = .N,
  n_responses = sum(n_responses),
  duration = sum(duration),
  accuracy = mean(accuracy)
), by = .(user_id, topic)]
```

Load test scores per topic:
```{r}
np_test_scores <- fread(here("data", "test", "noorderpoort_scores_by_topic.csv"))
np_test_scores[, Email := tolower(trimws(Email))]

# Link to MemoryLab user IDs
np_test_scores <- merge(np_test_scores, np_users, by.x = "Email", by.y = "email", all = TRUE)
```

There are some test scores for which we don't have any associated MemoryLab data:
```{r}
np_test_scores[is.na(user_id), .(unique(Email))]
```

There are also some MemoryLab users for which we don't have any associated test scores:
```{r}
np_test_scores[is.na(component), .(unique(Email))]
```

For this analysis we'll only include users of whom we have two test scores as well as some MemoryLab practice data.
```{r}
np_test_scores[, did_ml := !is.na(user_id)]
np_test_scores[, two_tests := uniqueN(test) == 2, by = .(user_id)]
np_test_scores[, include_user := did_ml & two_tests]
```

Mean test scores from included students:
```{r}
np_test_scores[include_user == TRUE & component == "Totaal punten", .(mean_grade = 10*mean(score)/40), by = test]
```

Distribution of test scores:
```{r}
np_test_scores[include_user == TRUE & component == "Totaal punten", .(test, score)] |>
  ggplot(aes(x = 10*score/40, fill = test)) +
  geom_density(alpha = 0.5) +
  labs(x = "Score", y = "Density", fill = "Test", caption = "Data from noorderpoort.memorylab.app") +
  scale_x_continuous(breaks = seq(0, 10, 1), limits = c(0, 10)) +
  theme_ml()
```

Do a paired t-test to show that the difference is significant:
```{r}
test_score_dat <- np_test_scores[include_user == TRUE & component == "Totaal punten", .(user_id, test, score)] |>
  dcast(user_id ~ test, value.var = "score")

test_score_dat

t.test(test_score_dat$Posttest, test_score_dat$Pretest, paired = TRUE)
```



Combine data:
```{r}
np_scores <- np_test_scores[include_user == TRUE & !component %in% c("Totaal punten", "Cijfer"), .(
  user_id,
  topic = component,
  score,
  test
)]
np_scores <- dcast(np_scores, user_id + topic ~ test, value.var = "score")
setnames(np_scores, c("Posttest", "Pretest"), c("score_test_2", "score_test_1"))
np_scores[, score_test_change := score_test_2 - score_test_1]
# np_scores[, topic := factor(topic, levels = c("Delen",
#                                               "Percentage",
#                                               "Cijfers",
#                                               "Breuken",
#                                               "Tafels",
#                                               "Decimalen",
#                                               "Aftrekken & Optellen",
#                                               "Vermenigvuldigen",
#                                               "Afronden",
#                                               "Eenheden",
#                                               "Rekentaal"))]

np_scores_and_practice <-  merge(np_scores, np_practice_stats, by = c("user_id", "topic"), all.x = TRUE)

# If a user has no practice data, we'll fill in zeros
np_scores_and_practice[is.na(n_sessions), n_sessions := 0]
np_scores_and_practice[is.na(n_responses), n_responses := 0]
np_scores_and_practice[is.na(duration), duration := 0]
```


Plot of scores:
```{r}
mean_scores <- np_scores_and_practice[, .(score_test_1 = mean(score_test_1), score_test_2 = mean(score_test_2)), by = .(topic)] |>
  melt(id.vars = "topic", variable.name = "test", value.name = "score")

p_scores <- melt(np_scores_and_practice, measure.vars = c("score_test_1", "score_test_2"), variable.name = "test", value.name = "score") |>
  ggplot(aes(x = test, y = score)) +
  facet_wrap(~ topic, ncol = 5) +
  geom_point(alpha = .4, size = .5) +
  geom_line(aes(group = user_id), alpha = .4, lty = 3) +
  geom_point(data = mean_scores, colour = colours_memorylab[1], size = 2.5) +
  geom_line(data = mean_scores, aes(group = topic), colour = colours_memorylab[1], lwd = 1) +
  scale_x_discrete(labels = c("1", "2")) +
  labs(x = "Toetsmoment", y = "Score", title = "Toetsscores") +
  theme_ml() +
  theme(panel.grid.major.y = element_line(colour = "grey90"),
        strip.text = element_text(face = "bold")
        )

p_scores

ggsave(here("output", "testscores_noorderpoort.png"), width = 8, height = 5)

```

How much was each topic practiced?
```{r}
ggplot(np_scores_and_practice, aes(x = n_responses)) +
  facet_wrap(~ topic, ncol = 5) +
  geom_histogram(binwidth = 20, fill = colours_memorylab[1]) +
  labs(x = "Number of practice responses per student", y = "Frequency", colour = "Topic", caption = "Data from noorderpoort.memorylab.app") +
  theme_ml()
```

Did students choose to practice topics on which their pretest score was low?
```{r}
# Add mean pretest scores per topic
mean_pretest_scores <- mean_scores[test == "score_test_1", .(topic, mean_score = round(score, 2))]
np_scores_and_practice <- merge(np_scores_and_practice, mean_pretest_scores, by = "topic")
np_scores_and_practice[, topic_label := paste0(topic, "\n(Gemiddelde score: ", mean_score, ")")]

ggplot(np_scores_and_practice, aes(x = score_test_1, y = n_responses)) +
  facet_wrap(~ topic_label, ncol = 5) +
  geom_point(aes(fill = as.factor(score_test_1)), colour = "black", alpha = .8, position = position_jitter(height = 0, width = .1, seed = 0), pch = 21) +
  scale_fill_brewer(palette = "RdYlGn") +
  guides(fill = "none") +
  labs(x = "Score op Toets 1", y = "Aantal gemaakte MemoryLab oefeningen", colour = "Onderwerp", caption = "noorderpoort.memorylab.app") +
  theme_ml() +
  theme(panel.grid.major.x = element_line(colour = "grey90"),
        panel.grid.major.y = element_line(colour = "grey90"),
        strip.text = element_text(face = "bold"))
```

Interpretation: not really.


Same plot but with totals instead of individual values:
```{r}
p_practice <- np_scores_and_practice[, .(n_sessions_total = sum(n_sessions)), by = .(topic_label, score_test_1)] |>
  ggplot(aes(x = score_test_1, y = n_sessions_total)) +
  facet_wrap(~ topic_label, ncol = 5) +
  geom_col(aes(fill = as.factor(score_test_1)), colour = "black", alpha = .8) +
  scale_fill_brewer(palette = "RdYlGn") +
  guides(fill = "none") +
  labs(x = "Score op Toets 1", y = "Aantal MemoryLab oefensessies", colour = "Onderwerp", title = "Oefenactiviteit") +
  theme_ml() +
  theme(panel.grid.major.x = element_line(colour = "grey90"),
        panel.grid.major.y = element_line(colour = "grey90"),
        strip.text = element_text(face = "bold"))

p_practice

ggsave(here("output", "memorylab_oefensessies_noorderpoort.png"), width = 9, height = 5)
```


Combined plot:
```{r}
library(patchwork)

p_scores + p_practice + plot_layout(ncol = 1)
ggsave(here("output", "memorylab_oefening_en_scores_noorderpoort.png"), width = 10, height = 10)
```



Is there a relation between score change and the number of practice sessions?
```{r}
np_scores_and_practice[, .(n_sessions = sum(n_sessions), score_test_change = mean(score_test_change)), by = .(topic_label)]

ggplot(np_scores_and_practice, aes(x = n_sessions, y = score_test_change)) +
  facet_wrap(~ topic) +
  geom_smooth(method = "lm", colour = colours_memorylab[1]) +
  geom_point(alpha = .25) +
  labs(x = "Number of practice sessions", y = "Change in test score", colour = "Topic", caption = "Data from noorderpoort.memorylab.app") +
  scale_colour_viridis_d() +
  theme_ml()
```

It looks like there might be a positive effect of practice.
Let's look at it more simply:
Is there a relation between score change and whether or not the student has practiced the topic at all?
```{r}
np_scores_and_practice[, did_practice := n_responses > 0]

avg_score_change <- np_scores_and_practice[, .(mean_score_test_change = mean(score_test_change)), by = .(topic, topic_label, did_practice)]

ggplot(np_scores_and_practice, aes(x = did_practice, y = score_test_change)) +
  facet_wrap(~ topic) +
  geom_hline(yintercept = 0, linetype = 2) +
  geom_violin(fill = "midnightblue", width = .25, alpha = .8) +
  geom_point(data = avg_score_change, aes(y = mean_score_test_change), size = 2, fill = "white", pch = 21) +
  geom_label(data = avg_score_change, aes(y = mean_score_test_change, label = round(mean_score_test_change, 2)), nudge_x = .33) +
  scale_x_discrete(labels = c("No", "Yes")) +
  labs(x = "Did the student practice the topic?", y = "Change in test score", colour = "Topic", caption = "Data from noorderpoort.memorylab.app") +
  theme_ml()

```

Average change:
```{r}
np_scores_and_practice[, .(N = .N, mean_score_test_change = mean(score_test_change), sd_score_test_change = sd(score_test_change)), by = .(topic, topic_label, did_practice)]
```

Is there a significant difference in score change between students who practiced and those who didn't, taking into account differences in score on the first test?
```{r}
library(lmerTest)

lmer(score_test_change ~ did_practice*score_test_1 + (1 | user_id), data = np_scores_and_practice) |>
  summary()
```
Yes: practicing is associated with an increase in score change of .96; scoring a point higher on test 1 is associated with a lower score change (-.30).

```{r}
lmer(score_test_change ~ did_practice*topic + (1 | user_id), data = np_scores_and_practice) |>
  summary()
```




On some topics, performance on the pretest was already really high, in which case we would not expect much improvement from practice.
Let's look at the relation between pretest score and score change, taking into account whether the student practiced or not:
```{r}
ggplot(np_scores_and_practice, aes(x = score_test_1, y = score_test_2, colour = did_practice)) +
  facet_wrap(~ topic, ncol = 5) +
  geom_abline(intercept = 0, slope = 1, linetype = 2) +
  geom_smooth(method = "lm") +
  geom_point(alpha = .25) +
  labs(x = "Pretest score", y = "Posttest score", colour = "Did the student\npractice the topic?", caption = "Data from noorderpoort.memorylab.app") +
  theme_ml() +
  coord_fixed(xlim = c(0, 4), ylim = c(0, 4))
```


Let's look at performance during practice.
Accuracy by topic:
```{r}
ggplot(np_practice_stats, aes(x = as.character(topic), y = accuracy, fill = topic)) +
  geom_boxplot() +
  geom_jitter(width = .1, height = 0, alpha = .5) +
  labs(x = "Topic", y = "Accuracy", fill = "Topic") +
  scale_y_continuous(limits = c(.4, 1), labels = scales::percent) +
  scale_fill_viridis_d() +
  guides(fill = "none") +
  theme_ml() +
  theme(panel.grid.major.y = element_line(colour = "grey"))
```

Accuracy by user:
```{r}
np_practice_stats[n_responses > 10, .(mean_accuracy = mean(accuracy), sd_accuracy = sd(accuracy)), by = .(user_id)]

ggplot(np_practice_stats[n_responses > 10], aes(x = reorder(as.character(user_id), accuracy), y = accuracy)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(aes(colour = as.character(topic)), width = .1, height = 0, alpha = .25) +
  labs(x = "Student", y = "Accuratesse", colour = "Onderwerp") +
  scale_y_continuous(limits = c(.4, 1), labels = scales::percent) +
  scale_colour_viridis_d() +
  guides(fill = "none") +
  theme_ml() +
  theme(panel.grid.major.y = element_line(colour = "grey"),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())
```

Speed of forgetting by fact and topic:
```{r}
np_fact_ids <- unique(np_responses$fact_id)
np_facts <- query_db(paste0("SELECT id AS fact_id, text FROM fact WHERE id IN (", paste(np_fact_ids, collapse = ", "), ")"), database = "ssaas")
np_facts[, text := gsub("\\+", " ", text)]
np_facts[, text := URLdecode(text)]
np_facts[, text := gsub("\n", " ", text, fixed = TRUE)]
np_facts[, text := gsub("＋", "+", text, fixed = TRUE)]

np_responses <- merge(np_responses, np_facts, by = "fact_id")

np_sof <- np_responses[, .(final_alpha = alpha[which.max(presentation_start_time)]), by = .(text, topic, user_id)]
np_sof_avg <- np_sof[, .(N = .N, sof_mean = mean(final_alpha), sof_se = sd(final_alpha)/sqrt(.N)), by = .(text, topic)]
```

```{r fig.height = 15, fig.width = 8}
ggplot(np_sof_avg[N > 10], aes(x = sof_mean, y = tidytext::reorder_within(text, sof_mean, as.character(topic)), alpha = N)) +
  facet_grid(as.character(topic) ~ ., scales = "free_y") +
  geom_errorbarh(aes(xmin = sof_mean - sof_se, xmax = sof_mean + sof_se), height = 0, colour = colours_memorylab[5]) +
  geom_point(colour = colours_memorylab[5]) +
  labs(y = "Feit", x = "Vergeetsnelheid (hoger = moeilijker)", alpha = "Geoefend door\naantal studenten") +
  theme_ml() +
  scale_x_continuous(limits = c(.1, .5)) +
  tidytext::scale_y_reordered() +
  theme(axis.text.y = element_text(size = 4),
        panel.grid.major.x = element_line(colour = "grey90"))

ggsave(here("output", "sof_by_fact_and_topic.png"), height = 15, width = 8)
```




# Conclusies Noorderpoort

Studenten scoren gemiddeld lager op de posttest dan op de nulmeting.

Er zijn een aantal factoren die een rol spelen:

   - Studenten hebben niet zo veel geoefend.
   - De oefening die wel gebeurde vond over het algemeen vrij ver van de posttest plaats.
   - Het startniveau van deze studenten was al vrij hoog, waardoor er minder ruimte voor verbetering was, en extra oefening in deze vorm wellicht niet zo zinvol was.
   - Als we kijken naar de combinatie van testscores en oefenactiviteit op individuele onderdelen, zien we een aantal patronen:
    - Veel oefening gebeurde op onderdelen waar het startniveau al hoog was, zoals Afronden, Aftrekken & Optellen, 


---


# Alfa college

```{r}
alfa_domain <- query_db("SELECT * FROM domain WHERE name = 'alfa.memorylab.app';", database = "slimstampen")
```

Users registered on this domain:
```{r}
alfa_users <- query_db(paste0("SELECT id AS user_id FROM users WHERE domain_id = ", alfa_domain$id, ";"), database = "slimstampen")
```

Lessons on this domain:
```{r}
alfa_lessons <- query_db(paste0("SELECT * FROM lesson WHERE domain_id = ", alfa_domain$id, ";"), database = "slimstampen")
```

Sessions from users on this domain during the pilot period:
```{r}
alfa_sessions <- query_db(paste0("SELECT * FROM session WHERE token_id = 2 AND user_id IN (", paste(alfa_users$user_id, collapse = ", "), ");"), database = "ssaas")
alfa_sessions <- alfa_sessions[create_time > "2024-11-01"]
```

When were these sessions?
```{r}
alfa_sessions[, session_date := as.Date(create_time)]

ggplot(alfa_sessions, aes(x = session_date)) +
  geom_histogram(binwidth = 1, fill = colours_memorylab[1]) +
  labs(x = "Date", y = "Sessions per day", caption = "Data from alfa.memorylab.app") +
  scale_y_continuous(expand = c(0, 0)) +
  theme_ml() +
  theme(panel.grid.major.y = element_line(colour = "grey90"))
```

Most popular days:
```{r}
alfa_sessions[, .N, by = .(session_date)][order(-N)]
```
Total sessions:
```{r}
nrow(alfa_sessions)
```

Sessions per user:
```{r}
alfa_sessions[, .N, by = user_id][order(-N)]
```

Total users with at least one session:
```{r}
length(unique(alfa_sessions$user_id))
```


Which lessons did users do? Parse the session context:
```{r}
alfa_sessions_parsed <- alfa_sessions[, map_dfr(context, fromJSON)] |> setDT()
```

```{r}
alfa_sessions_parsed[, .(`Keer geoefend` = .N), by = .(Les = title)][order(-`Keer geoefend`)] |> knitr::kable()
```

Mastery credits:
```{r}
alfa_credits <- query_db(paste0("SELECT * FROM lesson_mastered WHERE user_id IN (", paste(alfa_users$user_id, collapse = ", "), ");"), database = "slimstampen")
```

Add lesson titles:
```{r}
alfa_credits <- merge(alfa_credits, alfa_lessons[, .(lesson_id = id, title)])
```

```{r}
alfa_credits[, .N, by = .(title)][order(-N)]
```




```{r}
query_db(query = "SELECT *
FROM pg_catalog.pg_tables
WHERE schemaname != 'pg_catalog' AND 
    schemaname != 'information_schema';",
         database = "slimstampen")
```

